Functions
# Function to extract sample data as dataframe from phyloseq object
samdatAsDataframe <- function(ps) {
samdat <- phyloseq::sample_data(ps)
df <- data.frame(samdat, check.names = FALSE, stringsAsFactors = FALSE)
return(df)
}
# Function to rename variables in phyloseq object
ps_rename <- function(ps, ...) {
ps <- microViz::ps_get(ps)
df <- samdatAsDataframe(ps)
df <- dplyr::rename(.data = df, ...)
phyloseq::sample_data(ps) <- df
return(ps)
}
# SourceFolder function
source(here::here("Code", "R", "Functions", "StartFunctions", "sourceFolder.R"))
# Import all helper functions found in `/Functions`
sourceFolder(here::here("Code", "R", "Functions", "StartFunctions"), T)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
## Loading required package: future
##
## Attaching package: 'future'
## The following objects are masked from 'package:igraph':
##
## %->%, %<-%
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.18.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). Most of the arguments
## in the original pheatmap() are identically supported in the new function. You
## can still use the original function by explicitly calling pheatmap::pheatmap().
##
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:pheatmap':
##
## pheatmap
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
## The following object is masked from 'package:SummarizedExperiment':
##
## shift
## The following object is masked from 'package:GenomicRanges':
##
## shift
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
## Loading required package: doParallel
## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loading required package: iterators
## Loading required package: GUniFrac
## Registered S3 method overwritten by 'rmutil':
## method from
## print.response httr
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:GenomicRanges':
##
## subtract
## Loading required package: rlang
##
## Attaching package: 'rlang'
## The following object is masked from 'package:magrittr':
##
## set_names
## The following object is masked from 'package:data.table':
##
## :=
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
## The following object is masked from 'package:Biobase':
##
## exprs
## The following object is masked from 'package:igraph':
##
## is_named
##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2022 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
## Attaching package: 'microbiome'
## The following object is masked from 'package:scales':
##
## alpha
## The following object is masked from 'package:vegan':
##
## diversity
## The following object is masked from 'package:SummarizedExperiment':
##
## coverage
## The following object is masked from 'package:GenomicRanges':
##
## coverage
## The following objects are masked from 'package:IRanges':
##
## coverage, transform
## The following object is masked from 'package:S4Vectors':
##
## transform
## The following object is masked from 'package:igraph':
##
## diversity
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:base':
##
## transform
## Loading required package: ape
##
## Attaching package: 'ape'
## The following object is masked from 'package:Hmisc':
##
## zoom
## The following object is masked from 'package:dplyr':
##
## where
## The following object is masked from 'package:ggpubr':
##
## rotate
## The following objects are masked from 'package:igraph':
##
## degree, edges, mst, ring
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## The following object is masked from 'package:IRanges':
##
## collapse
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## The following object is masked from 'package:S4Vectors':
##
## expand
##
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
##
## lmList
## Loading required package: mvtnorm
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:future':
##
## cluster
## Loading required package: TH.data
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## 3 files sourced from folder "/Users/michaelsieler/Dropbox/Mac (2)/Documents/Sharpton_Lab/Projects_Repository/Rules_of_Life/major-experiment-2023/Code/R/Functions/StartFunctions"
sourceFolder(here::here("Code", "R", "Functions", "HelperFunctions"), T)
## 9 files sourced from folder "/Users/michaelsieler/Dropbox/Mac (2)/Documents/Sharpton_Lab/Projects_Repository/Rules_of_Life/major-experiment-2023/Code/R/Functions/HelperFunctions"
# Function to normalize taxa counts
normalize_taxa_counts <- function(ps) {
# Set seed for reproducibility
set.seed(42)
ps_normalized <- ps %>%
microViz::tax_fix() %>%
microViz::tax_transform("compositional", rank = "Genus") %>%
microViz::tax_transform("log2", zero_replace = "halfmin", chain = TRUE)
return(ps_normalized)
}
# Function to extract top differentially abundant taxa
extract_top_taxa <- function(maaslin_results, taxa_by_tank, top_n = 10) {
# Set seed for reproducibility
set.seed(42)
if (is.null(maaslin_results) || nrow(maaslin_results) == 0) {
return(character(0))
}
# Get available taxa columns from taxa_by_tank
available_taxa <- names(taxa_by_tank)[!names(taxa_by_tank) %in% c("Treatment", "Tank.ID")]
# Get top taxa from MaAsLin2 results
top_taxa <- maaslin_results %>%
dplyr::filter(qval < 0.05) %>%
dplyr::arrange(qval) %>%
dplyr::slice_head(n = top_n) %>%
dplyr::pull(Taxon) %>%
unique()
# Filter to only include taxa that are available in taxa_by_tank
available_top_taxa <- top_taxa[top_taxa %in% available_taxa]
# If we don't have enough available taxa, add more from the results
if (length(available_top_taxa) < top_n && length(top_taxa) > length(available_top_taxa)) {
remaining_taxa <- top_taxa[!top_taxa %in% available_top_taxa]
additional_needed <- top_n - length(available_top_taxa)
additional_taxa <- remaining_taxa[1:min(additional_needed, length(remaining_taxa))]
available_top_taxa <- c(available_top_taxa, additional_taxa)
}
# Print information about matching
cat("Found", length(top_taxa), "significant taxa in MaAsLin2 results\n")
cat("Available in taxa_by_tank:", length(available_top_taxa), "taxa\n")
if (length(top_taxa) > length(available_top_taxa)) {
missing_taxa <- top_taxa[!top_taxa %in% available_top_taxa]
cat("Missing taxa:", paste(missing_taxa[1:min(5, length(missing_taxa))], collapse = ", "))
if (length(missing_taxa) > 5) cat(" ... and", length(missing_taxa) - 5, "more")
cat("\n")
}
return(available_top_taxa)
}
# Function to calculate correlation between taxa and mortality
calculate_taxa_mortality_correlation <- function(taxa_data, mortality_data, method = "pearson") {
# Set seed for reproducibility
set.seed(42)
# Join taxa and mortality data
combined_data <- dplyr::left_join(taxa_data, mortality_data, by = c("Treatment", "Tank.ID"))
# Calculate correlations for each taxon
correlation_results <- list()
# Define columns to exclude (non-taxa columns)
exclude_cols <- c(
"percent_mortality", "n_fish", "n_infected", "mean_worm_burden", "percent_infected",
"Tank.ID", "total_per_group", "dead", "n", "Survived", "percent_survived"
)
# Get numeric columns (taxa abundances only)
taxa_cols <- names(combined_data)[sapply(combined_data, is.numeric)]
taxa_cols <- taxa_cols[!taxa_cols %in% exclude_cols]
# Additional check: only include columns that are likely taxa (not metadata)
# Taxa columns should not contain common metadata patterns
taxa_cols <- taxa_cols[!grepl("^(Treatment|Tank|ID|total|dead|n_|percent_|mean_)", taxa_cols)]
for (taxon in taxa_cols) {
# Calculate correlation
cor_result <- stats::cor.test(
combined_data[[taxon]],
combined_data$percent_mortality,
method = method,
use = "complete.obs"
)
correlation_results[[taxon]] <- data.frame(
Taxon = taxon,
Correlation = cor_result$estimate,
P_value = cor_result$p.value,
Method = method,
N = sum(!is.na(combined_data[[taxon]]) & !is.na(combined_data$percent_mortality)),
stringsAsFactors = FALSE
)
}
# Combine results
results_df <- dplyr::bind_rows(correlation_results) %>%
dplyr::arrange(P_value) %>%
dplyr::mutate(
Significant = P_value < 0.05,
Direction = ifelse(Correlation > 0, "Positive", "Negative")
)
return(results_df)
}
# Function to create correlation heatmap
create_correlation_heatmap <- function(correlation_results, title = "Taxa-Mortality Correlations") {
# Set seed for reproducibility
set.seed(42)
if (nrow(correlation_results) == 0) {
cat("No correlation results available for heatmap visualization.\n")
return(NULL)
}
# Filter for significant correlations only (p < 0.05)
significant_results <- correlation_results %>%
dplyr::filter(P_value < 0.05)
if (nrow(significant_results) == 0) {
cat("No significant correlations found for heatmap visualization.\n")
return(NULL)
}
# Prepare data for heatmap (significant correlations only)
heatmap_data <- significant_results %>%
dplyr::select(Taxon, Correlation, P_value) %>%
dplyr::mutate(
Significance = ifelse(P_value < 0.001, "***",
ifelse(P_value < 0.01, "**",
ifelse(P_value < 0.05, "*", ""))),
Label = paste0(round(Correlation, 3), Significance)
) %>%
dplyr::arrange(P_value)
# Create correlation matrix for heatmap
cor_matrix <- matrix(heatmap_data$Correlation, ncol = 1)
rownames(cor_matrix) <- heatmap_data$Taxon
colnames(cor_matrix) <- "Mortality"
# Create annotation for significance with white to black gradient
annotation_row <- data.frame(
P_value = heatmap_data$P_value,
row.names = heatmap_data$Taxon
)
# Create annotation colors (white to black gradient)
annotation_colors <- list(
P_value = colorRampPalette(c("black", "white"))(100)
)
# Create color palette
max_abs_cor <- max(abs(cor_matrix), na.rm = TRUE)
heatmap_colors <- colorRampPalette(c("blue", "white", "red"))(100)
if (nrow(cor_matrix) < 2) {
cluster_rows <- FALSE
} else {
cluster_rows <- TRUE
}
# Create the heatmap
pheatmap::pheatmap(
cor_matrix,
color = heatmap_colors,
breaks = seq(-max_abs_cor, max_abs_cor, length.out = 101),
annotation_row = annotation_row,
annotation_colors = annotation_colors,
cluster_rows = cluster_rows,
cluster_cols = FALSE,
show_rownames = TRUE,
show_colnames = TRUE,
fontsize_row = 8,
fontsize_col = 10,
main = paste0(title, "\n(Significant correlations only, p < 0.05)"),
treeheight_row = 20,
treeheight_col = 0
)
}
# Function to create correlation summary table
create_correlation_table <- function(correlation_results, title = "Taxa-Mortality Correlation Results") {
# Set seed for reproducibility
set.seed(42)
if (nrow(correlation_results) == 0) {
return(NULL)
}
# Create summary table
summary_table <- correlation_results %>%
dplyr::mutate(
Correlation = round(Correlation, 3),
P_value = format.pval(P_value, digits = 3)
) %>%
dplyr::select(Taxon, Correlation, P_value, Significant, Direction, N) %>%
gt::gt() %>%
gt::tab_header(
title = title,
subtitle = "Correlation between differentially abundant taxa and mortality"
) %>%
gt::cols_label(
Taxon = "Taxon",
Correlation = "Correlation",
P_value = "P-value",
Significant = "Significant",
Direction = "Direction",
N = "Sample Size"
) %>%
gt::tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels()
) %>%
gt::fmt_number(
columns = c(Correlation),
decimals = 3
) %>%
gt::tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = "Taxon")
) %>%
gt::tab_style(
style = cell_fill(color = "lightgreen"),
locations = cells_body(
columns = "Significant",
rows = Significant == TRUE
)
)
return(summary_table)
}
# Redefine the function to ensure it's available
extract_top_taxa <- function(maaslin_results, taxa_by_tank, top_n = 10) {
# Set seed for reproducibility
set.seed(42)
if (is.null(maaslin_results) || nrow(maaslin_results) == 0) {
return(character(0))
}
# Get available taxa columns from taxa_by_tank
available_taxa <- names(taxa_by_tank)[!names(taxa_by_tank) %in% c("Treatment", "Tank.ID")]
# Get top taxa from MaAsLin2 results
top_taxa <- maaslin_results %>%
dplyr::filter(qval < 0.05) %>%
dplyr::arrange(qval) %>%
dplyr::slice_head(n = top_n) %>%
dplyr::pull(Taxon) %>%
unique()
# Filter to only include taxa that are available in taxa_by_tank
available_top_taxa <- top_taxa[top_taxa %in% available_taxa]
# If we don't have enough available taxa, add more from the results
if (length(available_top_taxa) < top_n && length(top_taxa) > length(available_top_taxa)) {
remaining_taxa <- top_taxa[!top_taxa %in% available_top_taxa]
additional_needed <- top_n - length(available_top_taxa)
additional_taxa <- remaining_taxa[1:min(additional_needed, length(remaining_taxa))]
available_top_taxa <- c(available_top_taxa, additional_taxa)
}
# Print information about matching
cat("Found", length(top_taxa), "significant taxa in MaAsLin2 results\n")
cat("Available in taxa_by_tank:", length(available_top_taxa), "taxa\n")
if (length(top_taxa) > length(available_top_taxa)) {
missing_taxa <- top_taxa[!top_taxa %in% available_top_taxa]
cat("Missing taxa:", paste(missing_taxa[1:min(5, length(missing_taxa))], collapse = ", "))
if (length(missing_taxa) > 5) cat(" ... and", length(missing_taxa) - 5, "more")
cat("\n")
}
return(available_top_taxa)
}
create_taxa_mortality_plots <- function(correlation_results,
taxa_data,
mortality_data,
title_prefix = "") {
#––– 1. keep only significant taxa –––#
sig_taxa <- correlation_results %>%
dplyr::filter(P_value < 0.05) %>%
dplyr::pull(Taxon)
if (length(sig_taxa) == 0L) {
cli::cli_alert_warning("No significant correlations for plotting.")
return(NULL)
}
plot_list <- vector("list", length(sig_taxa))
names(plot_list) <- sig_taxa
set.seed(42)
for (taxon in sig_taxa) {
#––– 2. prep abundance data –––#
plot_data <- taxa_data %>%
dplyr::select(Treatment, Tank.ID, !!taxon) %>%
dplyr::rename(log2_abund = !!taxon) %>%
dplyr::mutate(
Taxon_Abund_pct = 2 ^ log2_abund * 100 # convert log2-abundance to %
)
max_abund <- max(plot_data$Taxon_Abund_pct, na.rm = TRUE)
#––– 3. scale mortality to primary y-axis –––#
# mort_scaled <- mortality_data %>%
# dplyr::mutate(
# Mort_scaled = percent_mortality / 50 * max_abund, # bar height
# label_y = Mort_scaled + 0.02 * max_abund, # text just above bar
# label_txt = paste0(round(percent_mortality, 0), "%") # one decimal place
# )
mort_scaled <- mortality_data %>%
dplyr::mutate(
Mort_scaled = if_else(percent_mortality / 50 * max_abund <= 0, 1e-4, percent_mortality / 100 * max_abund), # percent_mortality / 50 * max_abund , # bar height
label_y = Mort_scaled * 1.05, #Mort_scaled + 0.02 * max_abund, # text just above bar
label_txt = paste0(round(percent_mortality, 0), "%") # one decimal place
)
#––– 4. plotting –––#
dodge <- ggplot2::position_dodge(width = 0.8)
jitdod <- ggplot2::position_jitterdodge(dodge.width = 0.8,
jitter.width = 0.15,
jitter.height = 0)
ymax <- max(c(max_abund, mort_scaled$label_y), na.rm = TRUE) * 1.05
p <- ggplot2::ggplot() +
## 4a. mortality bars
ggplot2::geom_col(
data = mort_scaled,
ggplot2::aes(x = Treatment,
y = Mort_scaled,
group = Tank.ID),
position = dodge,
fill = "red",
alpha = 0.25,
width = 0.7
) +
## 4b. mortality labels
ggplot2::geom_text(
data = mort_scaled,
ggplot2::aes(x = Treatment,
y = label_y,
group = Tank.ID,
label = label_txt),
position = dodge,
size = 3,
vjust = 0
) +
## 4d. individual points
ggplot2::geom_point(
data = plot_data,
ggplot2::aes(x = Treatment,
y = Taxon_Abund_pct,
group = Tank.ID,
color = Treatment,
fill = Treatment),
position = jitdod,
size = 4,
shape = 21,
alpha = .25
) +
ggplot2::geom_point(
data = plot_data,
ggplot2::aes(x = Treatment,
y = Taxon_Abund_pct,
group = Tank.ID,
color = Treatment),
position = jitdod,
size = 4,
shape = 21
) +
## 4e. scales & themes
ggplot2::scale_fill_manual(values = treatment_color_scale) +
ggplot2::scale_color_manual(values = treatment_color_scale) +
ggplot2::scale_y_continuous(
name = glue::glue("Relative Abundance of {taxon} (%)"),
limits = c(0, ymax),
breaks = scales::pretty_breaks(8)
) +
# ggplot2::scale_y_continuous(
# trans = "log2",
# breaks = log2(1 / 2^(0:18)),
# labels = function(x) paste0(100 * round(2^x, 5), "%")
# )
# facet_wrap(.~stringr::str_detect(as.character(Treatment), "P\\+"), scales = "free_x") +
ggplot2::labs(
title = glue::glue("{title_prefix} : {taxon}"),
subtitle = glue::glue(
"Correlation with mortality: r = {round(correlation_results$Correlation[correlation_results$Taxon == taxon], 2)}, ",
"p = {scales::pvalue(correlation_results$P_value[correlation_results$Taxon == taxon], accuracy = .001)}"
),
x = "Treatment",
y = glue::glue("Relative Abundance of {taxon} (%)"),
caption = "Bars = Percent Mortality; Dots = average relative abundance (TSS) of taxon."
) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
legend.position = "none"
)
plot_list[[taxon]] <- p
}
plot_list
}
Import Data
ps.unclean <- readRDS('/Users/michaelsieler/Dropbox/Mac (2)/Documents/Sharpton_Lab/Projects_Repository/Rules_of_Life/major-experiment-2023/Data/Robjects/pseq_uncleaned_05052025.rds')
ps.tmp <- readRDS("/Users/michaelsieler/Dropbox/Mac (2)/Documents/Sharpton_Lab/Projects_Repository/Rules_of_Life/major-experiment-2023/Data/Robjects/pseq_uncleaned_05052025.rds")
ps.cleaned <-
ps.tmp %>%
## Update Metadata
ps_rename(Time = Timepoint) %>%
microViz::ps_mutate(
Treatment = case_when(
Antibiotics == 0 & Temperature == 0 & Pathogen == 0 ~ "A- T- P-",
Antibiotics == 0 & Temperature == 0 & Pathogen == 1 ~ "A- T- P+",
Antibiotics == 1 & Temperature == 0 & Pathogen == 0 ~ "A+ T- P-",
Antibiotics == 1 & Temperature == 0 & Pathogen == 1 ~ "A+ T- P+",
Antibiotics == 0 & Temperature == 1 & Pathogen == 0 ~ "A- T+ P-",
Antibiotics == 0 & Temperature == 1 & Pathogen == 1 ~ "A- T+ P+",
Antibiotics == 1 & Temperature == 1 & Pathogen == 0 ~ "A+ T+ P-",
Antibiotics == 1 & Temperature == 1 & Pathogen == 1 ~ "A+ T+ P+",
TRUE ~ "Unknown"
), .after = "Pathogen"
) %>%
microViz::ps_mutate(Sample = fecal.sample.number, .before = 1) %>%
microViz::ps_mutate(Sample = gsub("^f", "", Sample)) %>%
microViz::ps_filter(Treatment != "Unknown") %>%
microViz::ps_mutate(
History = case_when(
Antibiotics + Temperature == 0 ~ 0,
Antibiotics + Temperature == 1 ~ 1,
Antibiotics + Temperature == 2 ~ 2,
), .after = "Treatment"
) %>%
## Additional metadata updates, factorizing metadata
microViz::ps_mutate(
# Create treatment code
treatment_code = case_when(
Antibiotics == 0 & Temperature == 0 & Pathogen == 0 ~ "Aneg_Tneg_Pneg",
Antibiotics == 0 & Temperature == 0 & Pathogen == 1 ~ "Aneg_Tneg_Ppos",
Antibiotics == 1 & Temperature == 0 & Pathogen == 0 ~ "Apos_Tneg_Pneg",
Antibiotics == 1 & Temperature == 0 & Pathogen == 1 ~ "Apos_Tneg_Ppos",
Antibiotics == 0 & Temperature == 1 & Pathogen == 0 ~ "Aneg_Tpos_Pneg",
Antibiotics == 0 & Temperature == 1 & Pathogen == 1 ~ "Aneg_Tpos_Ppos",
Antibiotics == 1 & Temperature == 1 & Pathogen == 0 ~ "Apos_Tpos_Pneg",
Antibiotics == 1 & Temperature == 1 & Pathogen == 1 ~ "Apos_Tpos_Ppos"
),
# Create treatment group factor
treatment_group = case_when(
Antibiotics == 0 & Temperature == 0 & Pathogen == 1 ~ "Parasite",
Antibiotics == 1 & Temperature == 0 & Pathogen == 0 ~ "Antibiotics",
Antibiotics == 1 & Temperature == 0 & Pathogen == 1 ~ "Antibiotics_Parasite",
Antibiotics == 0 & Temperature == 1 & Pathogen == 0 ~ "Temperature",
Antibiotics == 0 & Temperature == 1 & Pathogen == 1 ~ "Temperature_Parasite",
Antibiotics == 1 & Temperature == 1 & Pathogen == 0 ~ "Antibiotics_Temperature",
Antibiotics == 1 & Temperature == 1 & Pathogen == 1 ~ "Antibiotics_Temperature_Parasite",
TRUE ~ "Control"
),
# Convert to factor with appropriate levels
treatment_group = factor(treatment_group,
levels = c("Control", "Parasite",
"Antibiotics", "Antibiotics_Parasite",
"Temperature", "Temperature_Parasite",
"Antibiotics_Temperature", "Antibiotics_Temperature_Parasite")
),
treatment_code = factor(treatment_code, levels = treatment_order),
# Create time point factor
time_point = factor(Time, levels = c(0, 14, 18, 25, 29, 60)),
# Create pathogen status factor
pathogen_status = factor(ifelse(Pathogen == 1, "Exposed", "Unexposed"),
levels = c("Unexposed", "Exposed")),
# Create sex factor
sex = factor(Sex, levels = c("M", "F"))
) %>%
microViz::ps_mutate(Treatment = factor(Treatment, levels = treatment_order)) %>%
microViz::ps_mutate(Exp_Type = case_when(
Treatment %in% c("A- T- P-", "A- T- P+") ~ "No prior stressor(s)",
Treatment %in% c("A+ T- P-", "A+ T- P+") ~ "Antibiotics",
Treatment %in% c("A- T+ P-", "A- T+ P+") ~ "Temperature",
Treatment %in% c("A+ T+ P-", "A+ T+ P+") ~ "Combined",
)) %>%
microViz::ps_mutate(Exp_Type = factor(Exp_Type, levels = c("No prior stressor(s)", "Antibiotics", "Temperature", "Combined"))) %>%
# Fix names for taxonomic ranks not identified
microViz::tax_fix(suffix_rank = "current", anon_unique = T, unknown = NA) %>%
# Filter for any samples that contain more than 5000 reads
microViz::ps_filter(sample_sums(.) > 5000) %>%
# Any taxa not found in at least 3 samples are removed
microViz::tax_filter(min_prevalence = 3, undetected = 0) %>%
# Remove any unwanted reads
microViz::tax_select(c("Mitochondria", "Chloroplast", "Eukaryota"), deselect = TRUE) %>%
microViz::tax_select(c("Bacteria, Phylum"), deselect = TRUE)
# Create a dataframe for mortality using the "unclean" phyloseq object
# The unclean phyloseq object is prior to removing samples during post-DADA2 processing (e.g., insufficient reads/sample)
data.mortality <-
ps.unclean %>%
## Update Metadata
ps_rename(Time = Timepoint) %>%
microViz::ps_mutate(
Treatment = case_when(
Antibiotics == 0 & Temperature == 0 & Pathogen == 0 ~ "A- T- P-",
Antibiotics == 0 & Temperature == 0 & Pathogen == 1 ~ "A- T- P+",
Antibiotics == 1 & Temperature == 0 & Pathogen == 0 ~ "A+ T- P-",
Antibiotics == 1 & Temperature == 0 & Pathogen == 1 ~ "A+ T- P+",
Antibiotics == 0 & Temperature == 1 & Pathogen == 0 ~ "A- T+ P-",
Antibiotics == 0 & Temperature == 1 & Pathogen == 1 ~ "A- T+ P+",
Antibiotics == 1 & Temperature == 1 & Pathogen == 0 ~ "A+ T+ P-",
Antibiotics == 1 & Temperature == 1 & Pathogen == 1 ~ "A+ T+ P+",
TRUE ~ "Unknown"
), .after = "Pathogen"
) %>%
microViz::ps_mutate(Sample = fecal.sample.number, .before = 1) %>%
microViz::ps_mutate(Sample = gsub("^f", "", Sample)) %>%
microViz::ps_filter(Treatment != "Unknown") %>%
microViz::ps_mutate(
History = case_when(
Antibiotics + Temperature == 0 ~ 0,
Antibiotics + Temperature == 1 ~ 1,
Antibiotics + Temperature == 2 ~ 2,
), .after = "Treatment"
) %>%
## Additional metadata updates, factorizing metadata
microViz::ps_mutate(
# Create treatment code
treatment_code = case_when(
Antibiotics == 0 & Temperature == 0 & Pathogen == 0 ~ "Aneg_Tneg_Pneg",
Antibiotics == 0 & Temperature == 0 & Pathogen == 1 ~ "Aneg_Tneg_Ppos",
Antibiotics == 1 & Temperature == 0 & Pathogen == 0 ~ "Apos_Tneg_Pneg",
Antibiotics == 1 & Temperature == 0 & Pathogen == 1 ~ "Apos_Tneg_Ppos",
Antibiotics == 0 & Temperature == 1 & Pathogen == 0 ~ "Aneg_Tpos_Pneg",
Antibiotics == 0 & Temperature == 1 & Pathogen == 1 ~ "Aneg_Tpos_Ppos",
Antibiotics == 1 & Temperature == 1 & Pathogen == 0 ~ "Apos_Tpos_Pneg",
Antibiotics == 1 & Temperature == 1 & Pathogen == 1 ~ "Apos_Tpos_Ppos"
),
# Create treatment group factor
treatment_group = case_when(
Antibiotics == 0 & Temperature == 0 & Pathogen == 1 ~ "Parasite",
Antibiotics == 1 & Temperature == 0 & Pathogen == 0 ~ "Antibiotics",
Antibiotics == 1 & Temperature == 0 & Pathogen == 1 ~ "Antibiotics_Parasite",
Antibiotics == 0 & Temperature == 1 & Pathogen == 0 ~ "Temperature",
Antibiotics == 0 & Temperature == 1 & Pathogen == 1 ~ "Temperature_Parasite",
Antibiotics == 1 & Temperature == 1 & Pathogen == 0 ~ "Antibiotics_Temperature",
Antibiotics == 1 & Temperature == 1 & Pathogen == 1 ~ "Antibiotics_Temperature_Parasite",
TRUE ~ "Control"
),
# Convert to factor with appropriate levels
treatment_group = factor(treatment_group,
levels = c("Control", "Parasite",
"Antibiotics", "Antibiotics_Parasite",
"Temperature", "Temperature_Parasite",
"Antibiotics_Temperature", "Antibiotics_Temperature_Parasite")
),
treatment_code = factor(treatment_code, levels = treatment_order),
# Create time point factor
time_point = factor(Time, levels = c(0, 14, 18, 25, 29, 60)),
# Create pathogen status factor
pathogen_status = factor(ifelse(Pathogen == 1, "Exposed", "Unexposed"),
levels = c("Unexposed", "Exposed")),
# Create sex factor
sex = factor(Sex, levels = c("M", "F"))
) %>%
microViz::ps_mutate(Treatment = factor(Treatment, levels = treatment_order)) %>%
microViz::ps_mutate(Exp_Type = case_when(
Treatment %in% c("A- T- P-", "A- T- P+") ~ "No prior stressor(s)",
Treatment %in% c("A+ T- P-", "A+ T- P+") ~ "Antibiotics",
Treatment %in% c("A- T+ P-", "A- T+ P+") ~ "Temperature",
Treatment %in% c("A+ T+ P-", "A+ T+ P+") ~ "Combined",
)) %>%
microViz::ps_mutate(Exp_Type = factor(Exp_Type, levels = c("No prior stressor(s)", "Antibiotics", "Temperature", "Combined"))) %>%
microViz::samdat_tbl()
# Create mortality data by treatment and tank
mortality_by_tank <- data.mortality %>%
dplyr::group_by(Treatment, Tank.ID) %>%
dplyr::summarise(
n_fish = n(),
n_infected = sum(Total.Worm.Count > 0, na.rm = TRUE),
mean_worm_burden = mean(Total.Worm.Count, na.rm = TRUE),
percent_infected = round((n_infected / n_fish) * 100, 1),
.groups = "drop"
) %>%
dplyr::mutate(
# Calculate mortality (assuming 90 fish per tank initially)
total_per_group = 30,
dead = total_per_group - n_fish,
percent_mortality = round((dead / total_per_group) * 100, 1),
# Convert Treatment to factor with specific order
Treatment = factor(Treatment, levels = treatment_order)
)
# Normalize taxa counts
ps.normalized <- normalize_taxa_counts(ps.cleaned)
# Extract taxa abundance data by treatment and tank
taxa_by_tank <- ps.normalized %>%
microViz::ps_filter(Time == 60) %>%
microViz::ps_melt() %>%
dplyr::group_by(Treatment, Tank.ID, Genus) %>%
dplyr::summarise(
mean_abundance = mean(Abundance, na.rm = TRUE),
.groups = "drop"
) %>%
tidyr::pivot_wider(
id_cols = c(Treatment, Tank.ID),
names_from = Genus,
values_from = mean_abundance,
values_fill = 0
)
## Warning in tax_filter_zeros(ps): Removing taxa whose abundance across filtered samples is equal to zero.
## This may not result in the desired outcome, as some values in the otu_table are negative.
## Avoid performing transformations, e.g. clr, before using `ps_filter()`, or set .keep_all_taxa = TRUE
## Warning in microViz::ps_melt(.): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
Load MaAsLin2 Results
# Load MaAsLin2 results for different analyses
maaslin_results <- list()
# Question 0: All treatments
all_file <- '/Users/michaelsieler/Dropbox/Mac (2)/Documents/Sharpton_Lab/Projects_Repository/Rules_of_Life/major-experiment-2023/Code/Analysis/DiffAbund/Results/All_60DPE__TREATMENT__significant_results.tsv'
if (file.exists(all_file)) {
maaslin_results[["All"]] <- readr::read_tsv(all_file) %>%
dplyr::rename(Taxon = feature) %>%
dplyr::mutate(Taxon = stringr::str_replace_all(Taxon, "\\.", "-")) %>%
dplyr::mutate(Taxon = stringr::str_replace_all(Taxon, "_", " ")) %>%
dplyr::arrange(qval)
cat("Loaded All treatments results with", nrow(maaslin_results[["All"]]), "significant taxa\n")
} else {
warning("MaAsLin2 results file not found for All treatments: ", all_file)
maaslin_results[["All"]] <- NULL
}
## Rows: 610 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): feature, metadata, value
## dbl (6): coef, stderr, N, N.not.0, pval, qval
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Loaded All treatments results with 610 significant taxa
# Question 1: Parasite Exposure Response
parasite_file <- '/Users/michaelsieler/Dropbox/Mac (2)/Documents/Sharpton_Lab/Projects_Repository/Rules_of_Life/major-experiment-2023/Code/Analysis/DiffAbund/Results/ParasiteExp_60DPE__TREATMENT__significant_results.tsv'
if (file.exists(parasite_file)) {
maaslin_results[["Parasite"]] <- readr::read_tsv(parasite_file) %>%
dplyr::rename(Taxon = feature) %>%
dplyr::mutate(Taxon = stringr::str_replace_all(Taxon, "\\.", "-")) %>%
dplyr::mutate(Taxon = stringr::str_replace_all(Taxon, "_", " ")) %>%
dplyr::arrange(qval)
cat("Loaded Parasite exposure results with", nrow(maaslin_results[["Parasite"]]), "significant taxa\n")
} else {
warning("MaAsLin2 results file not found for Parasite exposure: ", parasite_file)
maaslin_results[["Parasite"]] <- NULL
}
## Rows: 76 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): feature, metadata, value
## dbl (6): coef, stderr, N, N.not.0, pval, qval
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Loaded Parasite exposure results with 76 significant taxa
# Question 2: Historical Contingency of Parasite Response
hist_parasite_file <- '/Users/michaelsieler/Dropbox/Mac (2)/Documents/Sharpton_Lab/Projects_Repository/Rules_of_Life/major-experiment-2023/Code/Analysis/DiffAbund/Results/PriorStressParaExp_60DPE__TREATMENT__significant_results.tsv'
if (file.exists(hist_parasite_file)) {
maaslin_results[["Historical_Parasite"]] <- readr::read_tsv(hist_parasite_file) %>%
dplyr::rename(Taxon = feature) %>%
dplyr::mutate(Taxon = stringr::str_replace_all(Taxon, "\\.", "-")) %>%
dplyr::mutate(Taxon = stringr::str_replace_all(Taxon, "_", " ")) %>%
dplyr::arrange(qval)
cat("Loaded Historical contingency of parasite response results with", nrow(maaslin_results[["Historical_Parasite"]]), "significant taxa\n")
} else {
warning("MaAsLin2 results file not found for Historical contingency of parasite response: ", hist_parasite_file)
maaslin_results[["Historical_Parasite"]] <- NULL
}
## Rows: 97 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): feature, metadata, value
## dbl (6): coef, stderr, N, N.not.0, pval, qval
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Loaded Historical contingency of parasite response results with 97 significant taxa
# Question 3: Historical Contingency of Recovery
hist_recovery_file <- '/Users/michaelsieler/Dropbox/Mac (2)/Documents/Sharpton_Lab/Projects_Repository/Rules_of_Life/major-experiment-2023/Code/Analysis/DiffAbund/Results/PriorStressNoParaExp_60DPE__TREATMENT__significant_results.tsv'
if (file.exists(hist_recovery_file)) {
maaslin_results[["Historical_Recovery"]] <- readr::read_tsv(hist_recovery_file) %>%
dplyr::rename(Taxon = feature) %>%
dplyr::mutate(Taxon = stringr::str_replace_all(Taxon, "\\.", "-")) %>%
dplyr::mutate(Taxon = stringr::str_replace_all(Taxon, "_", " ")) %>%
dplyr::arrange(qval)
cat("Loaded Historical contingency of recovery results with", nrow(maaslin_results[["Historical_Recovery"]]), "significant taxa\n")
} else {
warning("MaAsLin2 results file not found for Historical contingency of recovery: ", hist_recovery_file)
maaslin_results[["Historical_Recovery"]] <- NULL
}
## Rows: 196 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): feature, metadata, value
## dbl (6): coef, stderr, N, N.not.0, pval, qval
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Loaded Historical contingency of recovery results with 196 significant taxa
# Summary of loaded results
cat("\n=== MaAsLin2 Results Summary ===\n")
##
## === MaAsLin2 Results Summary ===
for (analysis_name in names(maaslin_results)) {
if (!is.null(maaslin_results[[analysis_name]])) {
cat(analysis_name, ": ", nrow(maaslin_results[[analysis_name]]), " significant taxa\n")
} else {
cat(analysis_name, ": No data loaded\n")
}
}
## All : 610 significant taxa
## Parasite : 76 significant taxa
## Historical_Parasite : 97 significant taxa
## Historical_Recovery : 196 significant taxa
cat("===============================\n\n")
## ===============================